rm(list=ls())
.packages = c("car","doBy","lubridate","VIM","mi","mice", "Amelia","naniar", "dplyr", "lubridate")
.inst <- .packages %in% installed.packages()
if(length(.packages[!.inst]) > 0) install.packages(.packages[!.inst])
lapply(.packages, require, character.only=TRUE)
NA = Not Available
vector with missing values
x <- c(1, 99, 3, NA, 5, 5, NA, 99, 3, 3, NA, 1, 3, 5, 1, 1 )
x < 3
## [1] TRUE FALSE FALSE NA FALSE FALSE NA FALSE FALSE FALSE NA TRUE
## [13] FALSE FALSE TRUE TRUE
x == 99
## [1] FALSE TRUE FALSE NA FALSE FALSE NA TRUE FALSE FALSE NA FALSE
## [13] FALSE FALSE FALSE FALSE
NA cannot be used in comparisons
x == NA
## [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
NA + anything = NA
NA + 3
## [1] NA
NA | TRUE
## [1] TRUE
NA | FALSE
## [1] NA
na actions in functions
sum(x)
## [1] NA
sum(x, na.rm = T)
## [1] 229
NaN = Not a Number
Inf = Infinity
-Inf = Negative Infinity
NA + NaN
## [1] NA
NaN + NA
## [1] NA
vector
x <- c(1, 99, 3, NA, 5, 5, NA, 99, 3, 3, NA, 1, 3, 5, 1, 1 )
data.frame with missing values:
library(datasets)
data(airquality)
data(sleep)
time series
library(imputeTS)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
tsAirgap
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 NA 135 148 148 NA 119 104 118
## 1950 115 126 141 135 125 149 170 170 NA 133 NA 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 NA 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 NA NA NA 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 NA 465 467 404 347 NA 336
## 1958 340 318 NA 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 NA
## 1960 417 391 419 461 NA 535 622 606 508 461 390 432
spatial
spatio-temporal
library(cutoffR)
#hqmr.data
summary functions
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
skimr::skim(airquality)
| Name | airquality |
| Number of rows | 153 |
| Number of columns | 6 |
| _______________________ | |
| Column type frequency: | |
| numeric | 6 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Ozone | 37 | 0.76 | 42.13 | 32.99 | 1.0 | 18.00 | 31.5 | 63.25 | 168.0 | ▇▃▂▁▁ |
| Solar.R | 7 | 0.95 | 185.93 | 90.06 | 7.0 | 115.75 | 205.0 | 258.75 | 334.0 | ▅▃▅▇▅ |
| Wind | 0 | 1.00 | 9.96 | 3.52 | 1.7 | 7.40 | 9.7 | 11.50 | 20.7 | ▂▇▇▃▁ |
| Temp | 0 | 1.00 | 77.88 | 9.47 | 56.0 | 72.00 | 79.0 | 85.00 | 97.0 | ▂▃▇▇▃ |
| Month | 0 | 1.00 | 6.99 | 1.42 | 5.0 | 6.00 | 7.0 | 8.00 | 9.0 | ▇▇▇▇▇ |
| Day | 0 | 1.00 | 15.80 | 8.86 | 1.0 | 8.00 | 16.0 | 23.00 | 31.0 | ▇▇▇▇▆ |
Hmisc::describe(airquality)
## airquality
##
## 6 Variables 153 Observations
## --------------------------------------------------------------------------------
## Ozone
## n missing distinct Info Mean Gmd .05 .10
## 116 37 67 0.999 42.13 35.28 7.75 11.00
## .25 .50 .75 .90 .95
## 18.00 31.50 63.25 87.00 108.50
##
## lowest : 1 4 6 7 8, highest: 115 118 122 135 168
## --------------------------------------------------------------------------------
## Solar.R
## n missing distinct Info Mean Gmd .05 .10
## 146 7 117 1 185.9 102.7 24.25 47.50
## .25 .50 .75 .90 .95
## 115.75 205.00 258.75 288.50 311.50
##
## lowest : 7 8 13 14 19, highest: 320 322 323 332 334
## --------------------------------------------------------------------------------
## Wind
## n missing distinct Info Mean Gmd .05 .10
## 153 0 31 0.997 9.958 3.964 4.60 5.82
## .25 .50 .75 .90 .95
## 7.40 9.70 11.50 14.90 15.50
##
## lowest : 1.7 2.3 2.8 3.4 4.0, highest: 16.1 16.6 18.4 20.1 20.7
## --------------------------------------------------------------------------------
## Temp
## n missing distinct Info Mean Gmd .05 .10
## 153 0 40 0.999 77.88 10.74 60.2 64.2
## .25 .50 .75 .90 .95
## 72.0 79.0 85.0 90.0 92.0
##
## lowest : 56 57 58 59 61, highest: 92 93 94 96 97
## --------------------------------------------------------------------------------
## Month
## n missing distinct Info Mean Gmd
## 153 0 5 0.96 6.993 1.608
##
## lowest : 5 6 7 8 9, highest: 5 6 7 8 9
##
## Value 5 6 7 8 9
## Frequency 31 30 31 31 30
## Proportion 0.203 0.196 0.203 0.203 0.196
## --------------------------------------------------------------------------------
## Day
## n missing distinct Info Mean Gmd .05 .10
## 153 0 31 0.999 15.8 10.26 2.0 4.0
## .25 .50 .75 .90 .95
## 8.0 16.0 23.0 28.0 29.4
##
## lowest : 1 2 3 4 5, highest: 27 28 29 30 31
## --------------------------------------------------------------------------------
logical
base::in.na(), anyNA()
naniar::is_na(), any_na(), any_miss(), any_complete()
is.na(x)
## [1] FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE
## [13] FALSE FALSE FALSE FALSE
is.na(airquality)
## Ozone Solar.R Wind Temp Month Day
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] TRUE TRUE FALSE FALSE FALSE FALSE
## [6,] FALSE TRUE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] TRUE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE TRUE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE FALSE
## [25,] TRUE FALSE FALSE FALSE FALSE FALSE
## [26,] TRUE FALSE FALSE FALSE FALSE FALSE
## [27,] TRUE TRUE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE FALSE
## [32,] TRUE FALSE FALSE FALSE FALSE FALSE
## [33,] TRUE FALSE FALSE FALSE FALSE FALSE
## [34,] TRUE FALSE FALSE FALSE FALSE FALSE
## [35,] TRUE FALSE FALSE FALSE FALSE FALSE
## [36,] TRUE FALSE FALSE FALSE FALSE FALSE
## [37,] TRUE FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE FALSE
## [39,] TRUE FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE FALSE FALSE FALSE
## [42,] TRUE FALSE FALSE FALSE FALSE FALSE
## [43,] TRUE FALSE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE FALSE FALSE FALSE
## [45,] TRUE FALSE FALSE FALSE FALSE FALSE
## [46,] TRUE FALSE FALSE FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE FALSE FALSE FALSE
## [52,] TRUE FALSE FALSE FALSE FALSE FALSE
## [53,] TRUE FALSE FALSE FALSE FALSE FALSE
## [54,] TRUE FALSE FALSE FALSE FALSE FALSE
## [55,] TRUE FALSE FALSE FALSE FALSE FALSE
## [56,] TRUE FALSE FALSE FALSE FALSE FALSE
## [57,] TRUE FALSE FALSE FALSE FALSE FALSE
## [58,] TRUE FALSE FALSE FALSE FALSE FALSE
## [59,] TRUE FALSE FALSE FALSE FALSE FALSE
## [60,] TRUE FALSE FALSE FALSE FALSE FALSE
## [61,] TRUE FALSE FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE FALSE FALSE FALSE
## [65,] TRUE FALSE FALSE FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE FALSE FALSE FALSE
## [72,] TRUE FALSE FALSE FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE FALSE FALSE FALSE
## [74,] FALSE FALSE FALSE FALSE FALSE FALSE
## [75,] TRUE FALSE FALSE FALSE FALSE FALSE
## [76,] FALSE FALSE FALSE FALSE FALSE FALSE
## [77,] FALSE FALSE FALSE FALSE FALSE FALSE
## [78,] FALSE FALSE FALSE FALSE FALSE FALSE
## [79,] FALSE FALSE FALSE FALSE FALSE FALSE
## [80,] FALSE FALSE FALSE FALSE FALSE FALSE
## [81,] FALSE FALSE FALSE FALSE FALSE FALSE
## [82,] FALSE FALSE FALSE FALSE FALSE FALSE
## [83,] TRUE FALSE FALSE FALSE FALSE FALSE
## [84,] TRUE FALSE FALSE FALSE FALSE FALSE
## [85,] FALSE FALSE FALSE FALSE FALSE FALSE
## [86,] FALSE FALSE FALSE FALSE FALSE FALSE
## [87,] FALSE FALSE FALSE FALSE FALSE FALSE
## [88,] FALSE FALSE FALSE FALSE FALSE FALSE
## [89,] FALSE FALSE FALSE FALSE FALSE FALSE
## [90,] FALSE FALSE FALSE FALSE FALSE FALSE
## [91,] FALSE FALSE FALSE FALSE FALSE FALSE
## [92,] FALSE FALSE FALSE FALSE FALSE FALSE
## [93,] FALSE FALSE FALSE FALSE FALSE FALSE
## [94,] FALSE FALSE FALSE FALSE FALSE FALSE
## [95,] FALSE FALSE FALSE FALSE FALSE FALSE
## [96,] FALSE TRUE FALSE FALSE FALSE FALSE
## [97,] FALSE TRUE FALSE FALSE FALSE FALSE
## [98,] FALSE TRUE FALSE FALSE FALSE FALSE
## [99,] FALSE FALSE FALSE FALSE FALSE FALSE
## [100,] FALSE FALSE FALSE FALSE FALSE FALSE
## [101,] FALSE FALSE FALSE FALSE FALSE FALSE
## [102,] TRUE FALSE FALSE FALSE FALSE FALSE
## [103,] TRUE FALSE FALSE FALSE FALSE FALSE
## [104,] FALSE FALSE FALSE FALSE FALSE FALSE
## [105,] FALSE FALSE FALSE FALSE FALSE FALSE
## [106,] FALSE FALSE FALSE FALSE FALSE FALSE
## [107,] TRUE FALSE FALSE FALSE FALSE FALSE
## [108,] FALSE FALSE FALSE FALSE FALSE FALSE
## [109,] FALSE FALSE FALSE FALSE FALSE FALSE
## [110,] FALSE FALSE FALSE FALSE FALSE FALSE
## [111,] FALSE FALSE FALSE FALSE FALSE FALSE
## [112,] FALSE FALSE FALSE FALSE FALSE FALSE
## [113,] FALSE FALSE FALSE FALSE FALSE FALSE
## [114,] FALSE FALSE FALSE FALSE FALSE FALSE
## [115,] TRUE FALSE FALSE FALSE FALSE FALSE
## [116,] FALSE FALSE FALSE FALSE FALSE FALSE
## [117,] FALSE FALSE FALSE FALSE FALSE FALSE
## [118,] FALSE FALSE FALSE FALSE FALSE FALSE
## [119,] TRUE FALSE FALSE FALSE FALSE FALSE
## [120,] FALSE FALSE FALSE FALSE FALSE FALSE
## [121,] FALSE FALSE FALSE FALSE FALSE FALSE
## [122,] FALSE FALSE FALSE FALSE FALSE FALSE
## [123,] FALSE FALSE FALSE FALSE FALSE FALSE
## [124,] FALSE FALSE FALSE FALSE FALSE FALSE
## [125,] FALSE FALSE FALSE FALSE FALSE FALSE
## [126,] FALSE FALSE FALSE FALSE FALSE FALSE
## [127,] FALSE FALSE FALSE FALSE FALSE FALSE
## [128,] FALSE FALSE FALSE FALSE FALSE FALSE
## [129,] FALSE FALSE FALSE FALSE FALSE FALSE
## [130,] FALSE FALSE FALSE FALSE FALSE FALSE
## [131,] FALSE FALSE FALSE FALSE FALSE FALSE
## [132,] FALSE FALSE FALSE FALSE FALSE FALSE
## [133,] FALSE FALSE FALSE FALSE FALSE FALSE
## [134,] FALSE FALSE FALSE FALSE FALSE FALSE
## [135,] FALSE FALSE FALSE FALSE FALSE FALSE
## [136,] FALSE FALSE FALSE FALSE FALSE FALSE
## [137,] FALSE FALSE FALSE FALSE FALSE FALSE
## [138,] FALSE FALSE FALSE FALSE FALSE FALSE
## [139,] FALSE FALSE FALSE FALSE FALSE FALSE
## [140,] FALSE FALSE FALSE FALSE FALSE FALSE
## [141,] FALSE FALSE FALSE FALSE FALSE FALSE
## [142,] FALSE FALSE FALSE FALSE FALSE FALSE
## [143,] FALSE FALSE FALSE FALSE FALSE FALSE
## [144,] FALSE FALSE FALSE FALSE FALSE FALSE
## [145,] FALSE FALSE FALSE FALSE FALSE FALSE
## [146,] FALSE FALSE FALSE FALSE FALSE FALSE
## [147,] FALSE FALSE FALSE FALSE FALSE FALSE
## [148,] FALSE FALSE FALSE FALSE FALSE FALSE
## [149,] FALSE FALSE FALSE FALSE FALSE FALSE
## [150,] TRUE FALSE FALSE FALSE FALSE FALSE
## [151,] FALSE FALSE FALSE FALSE FALSE FALSE
## [152,] FALSE FALSE FALSE FALSE FALSE FALSE
## [153,] FALSE FALSE FALSE FALSE FALSE FALSE
is.na(tsAirgap)
## [1] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
## [109] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [133] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
naniar::any_na(airquality)
## [1] TRUE
complete.cases(airquality)
## [1] TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE FALSE FALSE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE TRUE FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE
## [49] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [73] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [97] FALSE FALSE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE FALSE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
numerical summaries
VIM::countNA(airquality)
## [1] 44
naniar::any_na(airquality)
## [1] TRUE
naniar::any_miss(airquality)
## [1] TRUE
naniar::n_miss(airquality)
## [1] 44
naniar::n_complete(airquality)
## [1] 874
When dealing with missing values, you might want to replace values with a missing values (NA). This is useful in cases when you know the origin of the data and can be certain which values should be missing. For example, you might know that all values of “N/A”, “N A”, and “Not Available”, or -99, or -1 are supposed to be missing.
x[x == 99] <- NA
x
## [1] 1 NA 3 NA 5 5 NA NA 3 3 NA 1 3 5 1 1
is.na(x)<-which(x==5)
x
## [1] 1 NA 3 NA NA NA NA NA 3 3 NA 1 3 NA 1 1
tidyr::replace_na() Missing values turns into a value (NA –> -99)
tidyr::replace_na(x)
## [1] 1 NA 3 NA NA NA NA NA 3 3 NA 1 3 NA 1 1
dplyr
dplyr::na_if()
naniar::replace_with_na(): Value becomes a missing value (-99 –> NA) , data.frame
replace_with_na(data, replace = list(), ...)
replace_with_na_all()
replace_with_na_at()
replace_with_na_if()
base::is.na(), is.na.data.frame(), is.na.numeric_version(), is.na.POSIXlt()
naniar::any_na(), any_miss()
stats::complete.cases()
naniar::n_miss(), n_complete(), pct_miss()
naniar::any_na(NaN)
## [1] TRUE
naniar::any_na(NULL)
## [1] FALSE
naniar::any_na(Inf)
## [1] FALSE
x
## [1] 1 NA 3 NA NA NA NA NA 3 3 NA 1 3 NA 1 1
complete.cases(x) # tam satirlar
## [1] TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE TRUE
## [13] TRUE FALSE TRUE TRUE
is.na(x)
## [1] FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE FALSE
## [13] FALSE TRUE FALSE FALSE
!complete.cases(x)
## [1] FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE FALSE
## [13] FALSE TRUE FALSE FALSE
naniar::n_miss(x)
## [1] 8
naniar::n_complete(x)
## [1] 8
naniar::pct_miss(x)
## [1] 50
naniar::pct_complete(x)
## [1] 50
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
dim(airquality)
## [1] 153 6
missing data olmayan satirlar
airquality[complete.cases(airquality), ]
dim(airquality[complete.cases(airquality), ])
## [1] 111 6
en az bir missing data olan satirlar
airquality[!complete.cases(airquality), ]
dim(airquality[!complete.cases(airquality), ])
## [1] 42 6
mantiksal operatorlerin sayisal degerleri 1 ve 0 oldugundan,
sum(is.na(airquality$Ozone)) # Dream degiskenindeki bos gozlem sayisi
## [1] 37
VIM::countNA(airquality$Ozone) # VIM paketi ile
## [1] 37
mean(is.na(sleep$Dream)) # Dream degiskenindeki bos gozlem orani
## [1] NaN
mean(!complete.cases(sleep)) # veri setinde en az bir bos gozlem olan satir orani
## [1] 0
na.omit, na.exclude, na.pass, na.fail, na.action, na.rm, na.last, useNA
mean(airquality$Ozone)
## [1] NA
mean(airquality$Ozone,na.rm=TRUE)
## [1] 42.12931
mean(na.omit(airquality$Ozone))
## [1] 42.12931
na.omit(airquality$Ozone)
## [1] 41 36 12 18 28 23 19 8 7 16 11 14 18 14 34 6 30 11
## [19] 1 11 4 32 23 45 115 37 29 71 39 23 21 37 20 12 13 135
## [37] 49 32 64 40 77 97 97 85 10 27 7 48 35 61 79 63 16 80
## [55] 108 20 52 82 50 64 59 39 9 16 78 35 66 122 89 110 44 28
## [73] 65 22 59 23 31 44 21 9 45 168 73 76 118 84 85 96 78 73
## [91] 91 47 32 20 23 21 24 44 21 28 9 13 46 18 13 24 16 13
## [109] 23 36 7 14 30 14 18 20
## attr(,"na.action")
## [1] 5 10 25 26 27 32 33 34 35 36 37 39 42 43 45 46 52 53 54
## [20] 55 56 57 58 59 60 61 65 72 75 83 84 102 103 107 115 119 150
## attr(,"class")
## [1] "omit"
airquality$Ozone
## [1] 41 36 12 18 NA 28 23 19 8 NA 7 16 11 14 18 14 34 6
## [19] 30 11 1 11 4 32 NA NA NA 23 45 115 37 NA NA NA NA NA
## [37] NA 29 NA 71 39 NA NA 23 NA NA 21 37 20 12 13 NA NA NA
## [55] NA NA NA NA NA NA NA 135 49 32 NA 64 40 77 97 97 85 NA
## [73] 10 27 NA 7 48 35 61 79 63 16 NA NA 80 108 20 52 82 50
## [91] 64 59 39 9 16 78 35 66 122 89 110 NA NA 44 28 65 NA 22
## [109] 59 23 31 44 21 9 NA 45 168 73 NA 76 118 84 85 96 78 73
## [127] 91 47 32 20 23 21 24 44 21 28 9 13 46 18 13 24 16 13
## [145] 23 36 7 14 30 NA 14 18 20
na.omit(airquality)
returns the object with observations removed if they contain any missing values
na.exclude(airquality)
differences between omitting and excluding NAs can be seen in some prediction and residual functions
na.fail(airquality)
returns the object only if it contains no missing values
na.pass(airquality)
returns the object unchanged
omit.model<- lm(Ozone ~ Solar.R, data = airquality, na.action = na.omit)
exclude.model<- lm(Ozone ~ Solar.R, data = airquality, na.action = na.exclude)
omit.model
##
## Call:
## lm(formula = Ozone ~ Solar.R, data = airquality, na.action = na.omit)
##
## Coefficients:
## (Intercept) Solar.R
## 18.5987 0.1272
exclude.model
##
## Call:
## lm(formula = Ozone ~ Solar.R, data = airquality, na.action = na.exclude)
##
## Coefficients:
## (Intercept) Solar.R
## 18.5987 0.1272
resid(omit.model)
## 1 2 3 4 7 8
## -1.7601294 2.3957702 -25.5463532 -40.4014578 -33.6211440 -12.1880897
## 9 12 13 14 15 16
## -13.0148679 -35.1530373 -44.4766565 -39.4420122 -8.8644704 -47.0719285
## 17 18 19 20 21 22
## -23.6384662 -22.5176190 -29.5459452 -13.1939997 -18.6160499 -48.2916147
## 23 24 28 29 30 31
## -17.7778596 1.7020672 2.7481237 -5.6443762 68.0434167 -17.0778386
## 38 40 41 44 47 48
## -5.7487173 15.3961782 -20.6731105 -14.4191880 -21.8872947 -17.7136649
## 49 50 51 62 63 64
## -3.3038428 -21.8585604 -23.0203700 82.1938142 -1.1357151 -16.6097319
## 66 67 68 69 70 71
## 23.1473497 -18.5286231 23.3036573 44.4481447 43.8123183 44.1473497
## 73 74 76 77 78 79
## -42.1703595 -13.8526503 -17.7026608 -3.6616984 -18.4420122 6.1591698
## 80 81 82 85 86 87
## 36.6213664 16.4249125 -3.4888847 24.0146824 61.0434167 -8.8991148
## 88 89 90 91 92 93
## 22.9737200 36.3150694 -3.5691775 13.2284585 8.1012932 9.8465547
## 94 95 99 100 101 104
## -12.6506943 -12.3904537 70.9741280 41.2804250 65.0780610 0.9855401
## 105 106 108 109 110 111
## -25.3148469 26.4363246 -5.6274621 33.9158434 -10.2227340 -18.6270541
## 112 113 114 116 117 118
## 1.2398706 -30.5345331 -14.1766776 -0.5577654 119.1359376 27.0607388
## 120 121 122 123 124 125
## 31.5867221 70.7890861 35.2631028 42.4942012 56.1646719 34.3497137
## 126 127 128 129 130 131
## 31.1300275 48.3670359 16.3205714 1.7020672 -30.6443762 -23.5750875
## 132 133 134 135 136 137
## -26.8467403 -27.5345331 -4.6097319 -30.5345331 -20.8640624 -12.6506943
## 138 139 140 141 142 143
## -19.8412382 -2.7368972 -29.0837486 -9.0321901 -24.8640624 -28.1589474
## 144 145 146 147 148 149
## -35.8640624 2.6209584 -0.2747005 -17.8298261 -7.1420332 -13.1416252
## 151 152 153
## -28.8872947 -17.2573784 -26.9565833
resid(exclude.model)
## 1 2 3 4 5 6
## -1.7601294 2.3957702 -25.5463532 -40.4014578 NA NA
## 7 8 9 10 11 12
## -33.6211440 -12.1880897 -13.0148679 NA NA -35.1530373
## 13 14 15 16 17 18
## -44.4766565 -39.4420122 -8.8644704 -47.0719285 -23.6384662 -22.5176190
## 19 20 21 22 23 24
## -29.5459452 -13.1939997 -18.6160499 -48.2916147 -17.7778596 1.7020672
## 25 26 27 28 29 30
## NA NA NA 2.7481237 -5.6443762 68.0434167
## 31 32 33 34 35 36
## -17.0778386 NA NA NA NA NA
## 37 38 39 40 41 42
## NA -5.7487173 NA 15.3961782 -20.6731105 NA
## 43 44 45 46 47 48
## NA -14.4191880 NA NA -21.8872947 -17.7136649
## 49 50 51 52 53 54
## -3.3038428 -21.8585604 -23.0203700 NA NA NA
## 55 56 57 58 59 60
## NA NA NA NA NA NA
## 61 62 63 64 65 66
## NA 82.1938142 -1.1357151 -16.6097319 NA 23.1473497
## 67 68 69 70 71 72
## -18.5286231 23.3036573 44.4481447 43.8123183 44.1473497 NA
## 73 74 75 76 77 78
## -42.1703595 -13.8526503 NA -17.7026608 -3.6616984 -18.4420122
## 79 80 81 82 83 84
## 6.1591698 36.6213664 16.4249125 -3.4888847 NA NA
## 85 86 87 88 89 90
## 24.0146824 61.0434167 -8.8991148 22.9737200 36.3150694 -3.5691775
## 91 92 93 94 95 96
## 13.2284585 8.1012932 9.8465547 -12.6506943 -12.3904537 NA
## 97 98 99 100 101 102
## NA NA 70.9741280 41.2804250 65.0780610 NA
## 103 104 105 106 107 108
## NA 0.9855401 -25.3148469 26.4363246 NA -5.6274621
## 109 110 111 112 113 114
## 33.9158434 -10.2227340 -18.6270541 1.2398706 -30.5345331 -14.1766776
## 115 116 117 118 119 120
## NA -0.5577654 119.1359376 27.0607388 NA 31.5867221
## 121 122 123 124 125 126
## 70.7890861 35.2631028 42.4942012 56.1646719 34.3497137 31.1300275
## 127 128 129 130 131 132
## 48.3670359 16.3205714 1.7020672 -30.6443762 -23.5750875 -26.8467403
## 133 134 135 136 137 138
## -27.5345331 -4.6097319 -30.5345331 -20.8640624 -12.6506943 -19.8412382
## 139 140 141 142 143 144
## -2.7368972 -29.0837486 -9.0321901 -24.8640624 -28.1589474 -35.8640624
## 145 146 147 148 149 150
## 2.6209584 -0.2747005 -17.8298261 -7.1420332 -13.1416252 NA
## 151 152 153
## -28.8872947 -17.2573784 -26.9565833
data.frame(resid(omit.model),resid(exclude.model)) # error
fitted(omit.model)
## 1 2 3 4 7 8 9 12
## 42.76013 33.60423 37.54635 58.40146 56.62114 31.18809 21.01487 51.15304
## 13 14 15 16 17 18 19 20
## 55.47666 53.44201 26.86447 61.07193 57.63847 28.51762 59.54595 24.19400
## 21 22 23 24 28 29 30 31
## 19.61605 59.29161 21.77786 30.29793 20.25188 50.64438 46.95658 54.07784
## 38 40 41 44 47 48 49 50
## 34.74872 55.60382 59.67311 37.41919 42.88729 54.71366 23.30384 33.85856
## 51 62 63 64 66 67 68 69
## 36.02037 52.80619 50.13572 48.60973 40.85265 58.52862 53.69634 52.55186
## 70 71 73 74 76 77 78 79
## 53.18768 40.85265 52.17036 40.85265 24.70266 51.66170 53.44201 54.84083
## 80 81 82 85 86 87 88 89
## 42.37863 46.57509 19.48888 55.98532 46.95658 28.89911 29.02628 45.68493
## 90 91 92 93 94 95 99 100
## 53.56918 50.77154 50.89871 29.15345 21.65069 28.39045 51.02587 47.71957
## 101 104 105 106 108 109 110 111
## 44.92194 43.01446 53.31485 38.56368 27.62746 25.08416 33.22273 49.62705
## 112 113 114 116 117 118 120 121
## 42.76013 51.53453 23.17668 45.55777 48.86406 45.93926 44.41328 47.21091
## 122 123 124 125 126 127 128 129
## 48.73690 42.50580 39.83533 43.65029 41.86997 42.63296 30.67943 30.29793
## 130 131 132 133 134 135 136 137
## 50.64438 46.57509 47.84674 51.53453 48.60973 51.53453 48.86406 21.65069
## 138 139 140 141 142 143 144 145
## 32.84124 48.73690 47.08375 22.03219 48.86406 44.15895 48.86406 20.37904
## 146 147 148 149 151 152 153
## 36.27470 24.82983 21.14203 43.14163 42.88729 35.25738 46.95658
fitted(exclude.model)
## 1 2 3 4 5 6 7 8
## 42.76013 33.60423 37.54635 58.40146 NA NA 56.62114 31.18809
## 9 10 11 12 13 14 15 16
## 21.01487 NA NA 51.15304 55.47666 53.44201 26.86447 61.07193
## 17 18 19 20 21 22 23 24
## 57.63847 28.51762 59.54595 24.19400 19.61605 59.29161 21.77786 30.29793
## 25 26 27 28 29 30 31 32
## NA NA NA 20.25188 50.64438 46.95658 54.07784 NA
## 33 34 35 36 37 38 39 40
## NA NA NA NA NA 34.74872 NA 55.60382
## 41 42 43 44 45 46 47 48
## 59.67311 NA NA 37.41919 NA NA 42.88729 54.71366
## 49 50 51 52 53 54 55 56
## 23.30384 33.85856 36.02037 NA NA NA NA NA
## 57 58 59 60 61 62 63 64
## NA NA NA NA NA 52.80619 50.13572 48.60973
## 65 66 67 68 69 70 71 72
## NA 40.85265 58.52862 53.69634 52.55186 53.18768 40.85265 NA
## 73 74 75 76 77 78 79 80
## 52.17036 40.85265 NA 24.70266 51.66170 53.44201 54.84083 42.37863
## 81 82 83 84 85 86 87 88
## 46.57509 19.48888 NA NA 55.98532 46.95658 28.89911 29.02628
## 89 90 91 92 93 94 95 96
## 45.68493 53.56918 50.77154 50.89871 29.15345 21.65069 28.39045 NA
## 97 98 99 100 101 102 103 104
## NA NA 51.02587 47.71957 44.92194 NA NA 43.01446
## 105 106 107 108 109 110 111 112
## 53.31485 38.56368 NA 27.62746 25.08416 33.22273 49.62705 42.76013
## 113 114 115 116 117 118 119 120
## 51.53453 23.17668 NA 45.55777 48.86406 45.93926 NA 44.41328
## 121 122 123 124 125 126 127 128
## 47.21091 48.73690 42.50580 39.83533 43.65029 41.86997 42.63296 30.67943
## 129 130 131 132 133 134 135 136
## 30.29793 50.64438 46.57509 47.84674 51.53453 48.60973 51.53453 48.86406
## 137 138 139 140 141 142 143 144
## 21.65069 32.84124 48.73690 47.08375 22.03219 48.86406 44.15895 48.86406
## 145 146 147 148 149 150 151 152
## 20.37904 36.27470 24.82983 21.14203 43.14163 NA 42.88729 35.25738
## 153
## 46.95658
na.omit and na.exclude do not use the missing values, but maintains their position for the residuals and fitted values.
summary(airquality$Ozone)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.00 18.00 31.50 42.13 63.25 168.00 37
table(airquality$Ozone)
##
## 1 4 6 7 8 9 10 11 12 13 14 16 18 19 20 21 22 23 24 27
## 1 1 1 3 1 3 1 3 2 4 4 4 4 1 4 4 1 6 2 1
## 28 29 30 31 32 34 35 36 37 39 40 41 44 45 46 47 48 49 50 52
## 3 1 2 1 3 1 2 2 2 2 1 1 3 2 1 1 1 1 1 1
## 59 61 63 64 65 66 71 73 76 77 78 79 80 82 84 85 89 91 96 97
## 2 1 1 2 1 1 1 2 1 1 2 1 1 1 1 2 1 1 1 2
## 108 110 115 118 122 135 168
## 1 1 1 1 1 1 1
table(airquality$Ozone,useNA="ifany")
##
## 1 4 6 7 8 9 10 11 12 13 14 16 18 19 20 21
## 1 1 1 3 1 3 1 3 2 4 4 4 4 1 4 4
## 22 23 24 27 28 29 30 31 32 34 35 36 37 39 40 41
## 1 6 2 1 3 1 2 1 3 1 2 2 2 2 1 1
## 44 45 46 47 48 49 50 52 59 61 63 64 65 66 71 73
## 3 2 1 1 1 1 1 1 2 1 1 2 1 1 1 2
## 76 77 78 79 80 82 84 85 89 91 96 97 108 110 115 118
## 1 1 2 1 1 1 1 2 1 1 1 2 1 1 1 1
## 122 135 168 <NA>
## 1 1 1 37
table(airquality$Ozone, useNA="always")
##
## 1 4 6 7 8 9 10 11 12 13 14 16 18 19 20 21
## 1 1 1 3 1 3 1 3 2 4 4 4 4 1 4 4
## 22 23 24 27 28 29 30 31 32 34 35 36 37 39 40 41
## 1 6 2 1 3 1 2 1 3 1 2 2 2 2 1 1
## 44 45 46 47 48 49 50 52 59 61 63 64 65 66 71 73
## 3 2 1 1 1 1 1 1 2 1 1 2 1 1 1 2
## 76 77 78 79 80 82 84 85 89 91 96 97 108 110 115 118
## 1 1 2 1 1 1 1 2 1 1 1 2 1 1 1 1
## 122 135 168 <NA>
## 1 1 1 37
length(airquality$Ozone)
## [1] 153
x1 <- sort(airquality$Ozone)
x1
## [1] 1 4 6 7 7 7 8 9 9 9 10 11 11 11 12 12 13 13
## [19] 13 13 14 14 14 14 16 16 16 16 18 18 18 18 19 20 20 20
## [37] 20 21 21 21 21 22 23 23 23 23 23 23 24 24 27 28 28 28
## [55] 29 30 30 31 32 32 32 34 35 35 36 36 37 37 39 39 40 41
## [73] 44 44 44 45 45 46 47 48 49 50 52 59 59 61 63 64 64 65
## [91] 66 71 73 73 76 77 78 78 79 80 82 84 85 85 89 91 96 97
## [109] 97 108 110 115 118 122 135 168
length(x1)
## [1] 116
x2 <- sort(airquality$Ozone, na.last = TRUE)
x2
## [1] 1 4 6 7 7 7 8 9 9 9 10 11 11 11 12 12 13 13
## [19] 13 13 14 14 14 14 16 16 16 16 18 18 18 18 19 20 20 20
## [37] 20 21 21 21 21 22 23 23 23 23 23 23 24 24 27 28 28 28
## [55] 29 30 30 31 32 32 32 34 35 35 36 36 37 37 39 39 40 41
## [73] 44 44 44 45 45 46 47 48 49 50 52 59 59 61 63 64 64 65
## [91] 66 71 73 73 76 77 78 78 79 80 82 84 85 85 89 91 96 97
## [109] 97 108 110 115 118 122 135 168 NA NA NA NA NA NA NA NA NA NA
## [127] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [145] NA NA NA NA NA NA NA NA NA
length(x2)
## [1] 153
mice::md.pattern(airquality) # mice paketi
## Wind Temp Month Day Solar.R Ozone
## 111 1 1 1 1 1 1 0
## 35 1 1 1 1 1 0 1
## 5 1 1 1 1 0 1 1
## 2 1 1 1 1 0 0 2
## 0 0 0 0 7 37 44
Aggregations for missing/imputed values Calculate or plot the amount of missing/imputed values in each variable and the amount of missing/imputed values in certain combinations of variables.
aggr(x, delimiter = NULL, plot = TRUE, ...)
## S3 method for class 'aggr'
plot(x, col = c("skyblue", "red", "orange"), bars = TRUE,
numbers = FALSE, prop = TRUE, combined = FALSE, varheight = FALSE,
only.miss = FALSE, border = par("fg"), sortVars = FALSE,
sortCombs = TRUE, ylabs = NULL, axes = TRUE, labels = axes,
cex.lab = 1.2, cex.axis = par("cex"), cex.numbers = par("cex"),
gap = 4, ...)
a<-VIM::aggr(airquality, prop=FALSE, numbers=TRUE) # VIM paketi
summary(a) # missing data oruntusu
##
## Missings per variable:
## Variable Count
## Ozone 37
## Solar.R 7
## Wind 0
## Temp 0
## Month 0
## Day 0
##
## Missings in combinations of variables:
## Combinations Count Percent
## 0:0:0:0:0:0 111 72.549020
## 0:1:0:0:0:0 5 3.267974
## 1:0:0:0:0:0 35 22.875817
## 1:1:0:0:0:0 2 1.307190
matrix plot
Create a matrix plot, in which all cells of a data matrix are visualized by rectangles. Available data is coded according to a continuous color scheme, while missing/imputed data is visualized by a clearly distinguishable color.
matrixplot(x, delimiter = NULL, sortby = NULL, col = c("red", "orange"),
fixup = TRUE, xlim = NULL, ylim = NULL, main = NULL,
sub = NULL, xlab = NULL, ylab = NULL, axes = TRUE, labels = axes,
xpd = NULL, interactive = TRUE, ...)
VIM::matrixplot(airquality)
##
## Click in a column to sort by the corresponding variable.
## To regain use of the VIM GUI and the R console, click outside the plot region.
VIM::matrixplot(airquality, interactive = TRUE, sortby = "Ozone") #!
##
## Click in a column to sort by the corresponding variable.
## To regain use of the VIM GUI and the R console, click outside the plot region.
Scatterplot with additional information in the margins In addition to a standard scatterplot, information about missing/imputed values is shown in the plot margins. Furthermore, imputed values are highlighted in the scatterplot.
marginplot(x, delimiter = NULL, col = c("skyblue", "red", "red4", "orange",
"orange4"), alpha = NULL, pch = c(1, 16), cex = par("cex"),
numbers = TRUE, cex.numbers = par("cex"), zeros = FALSE, xlim = NULL,
ylim = NULL, main = NULL, sub = NULL, xlab = NULL, ylab = NULL,
ann = par("ann"), axes = TRUE, frame.plot = axes, ...)
VIM::marginplot(airquality[,c("Ozone","Solar.R")])
VIM::marginplot(airquality[c("Ozone","Solar.R")], pch=c(20),col=c("darkgray", "red", "blue"))
Marginplot Matrix Create a scatterplot matrix with information about missing/imputed values in the plot margins of each panel.
marginmatrix(x, delimiter = NULL, col = c("skyblue", "red", "red4",
"orange", "orange4"), alpha = NULL, ...)
VIM::marginmatrix(airquality[,-5])
Barplot with information about missing/imputed values Barplot with highlighting of missing/imputed values in other variables by splitting each bar into two parts. Additionally, information about missing/imputed values in the variable of interest is shown on the right hand side.
barMiss(x, delimiter = NULL, pos = 1, selection = c("any", "all"),
col = c("skyblue", "red", "skyblue4", "red4", "orange", "orange4"),
border = NULL, main = NULL, sub = NULL, xlab = NULL, ylab = NULL,
axes = TRUE, labels = axes, only.miss = TRUE, miss.labels = axes,
interactive = TRUE, ...)
VIM::barMiss(airquality[,c("Month","Ozone")])
##
## Click in in the left margin to switch to the previous variable or in the right margin to switch to the next variable.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.
airquality[,c("Month","Ozone")] # grafik verisini gormek icin
Rug representation of missing/imputed values Add a rug representation of missing/imputed values in only one of the variables to scatterplots.
rugNA(x, y, ticksize = NULL, side = 1, col = "red", alpha = NULL,
miss = NULL, lwd = 0.5, ...)
plot(airquality$Ozone, airquality$Solar.R)
VIM::rugNA(airquality$Ozone, airquality$Solar.R,side=1) # y eksenindeki missingleri x ekseni uzerinde gosteriyor
VIM::rugNA(airquality$Ozone, airquality$Solar.R,ticksize = 1, col= "orange", side=2) # miss argumani ekleyerek missing yerine imputed veriler gosterilebilir ?rugNA bakiniz
Scatterplot matrix with information about missing/imputed values Scatterplot matrix in which observations with missing/imputed values in certain variables are highlighted.
scattmatrixMiss(x, delimiter = NULL, highlight = NULL,
selection = c("any", "all"), plotvars = NULL, col = c("skyblue", "red",
"orange"), alpha = NULL, pch = c(1, 3), lty = par("lty"),
diagonal = c("density", "none"), interactive = TRUE, ...)
VIM::scattmatrixMiss(airquality) # all variables highlighted, delimiter argumani imputed degerler icin kullanilir, ?scattmatrixMiss
##
## Click in a diagonal panel to add to or remove from the highlight selection.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.
##
## Highlighted missings in any of the variables 'Ozone', 'Solar.R', 'Wind', 'Temp', 'Month', 'Day'.
VIM::scattmatrixMiss(airquality, highlight = "Ozone")
##
## Click in a diagonal panel to add to or remove from the highlight selection.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.
##
## Highlighted 'missings' in variable 'Ozone'.
Parallel boxplots with information about missing/imputed values Boxplot of one variable of interest plus information about missing/imputed values in other variables.
pbox(x, delimiter = NULL, pos = 1, selection = c("none", "any", "all"),
col = c("skyblue", "red", "red4", "orange", "orange4"), numbers = TRUE,
cex.numbers = par("cex"), xlim = NULL, ylim = NULL, main = NULL,
sub = NULL, xlab = NULL, ylab = NULL, axes = TRUE,
frame.plot = axes, labels = axes, interactive = TRUE, ...)
VIM::pbox(airquality) # parallel boxplots obs ve miss icin
## Warning in createPlot(main, sub, xlab, ylab, labels, ca$at): not enough space to
## display frequencies
##
## Click in in the left margin to switch to the previous variable or in the right margin to switch to the next variable.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.
Parallel coordinate plot with information about missing/imputed values Parallel coordinate plot with adjustments for missing/imputed values. Missing values in the plotted variables may be represented by a point above the corresponding coordinate axis to prevent disconnected lines. In addition, observations with missing/imputed values in selected variables may be highlighted.
parcoordMiss(x, delimiter = NULL, highlight = NULL, selection = c("any",
"all"), plotvars = NULL, plotNA = TRUE, col = c("skyblue", "red",
"skyblue4", "red4", "orange", "orange4"), alpha = NULL, lty = par("lty"),
xlim = NULL, ylim = NULL, main = NULL, sub = NULL, xlab = NULL,
ylab = NULL, labels = TRUE, xpd = NULL, interactive = TRUE, ...)
VIM::parcoordMiss(airquality) #paralel koordinatlar grafigi
##
## Click on a coordinate axis to add to or remove from the highlight selection.
## Click in the top margin to toggle visualizing missing values in the plot variables.
## To regain use of the VIM GUI and the R console, click in any of the other plot margins.
##
## Highlighted missings in any of the variables 'Ozone', 'Solar.R', 'Wind', 'Temp', 'Month', 'Day'.
Bivariate jitter plot Create a bivariate jitter plot.
scattJitt(x, delimiter = NULL, col = c("skyblue", "red", "red4", "orange",
"orange4"), alpha = NULL, cex = par("cex"), col.line = "lightgrey",
lty = "dashed", lwd = par("lwd"), numbers = TRUE,
cex.numbers = par("cex"), main = NULL, sub = NULL, xlab = NULL,
ylab = NULL, axes = TRUE, frame.plot = axes, labels = c("observed",
"missing", "imputed"), ...)
VIM::scattJitt(airquality[,1:2]) # Ozone ve Solar.R icin jittered scatterplot
plot(airquality[,1:2])
Spineplot with information about missing/imputed values Spineplot or spinogram with highlighting of missing/imputed values in other variables by splitting each cell into two parts. Additionally, information about missing/imputed values in the variable of interest is shown on the right hand side.
spineMiss(x, delimiter = NULL, pos = 1, selection = c("any", "all"),
breaks = "Sturges", right = TRUE, col = c("skyblue", "red", "skyblue4",
"red4", "orange", "orange4"), border = NULL, main = NULL, sub = NULL,
xlab = NULL, ylab = NULL, axes = TRUE, labels = axes,
only.miss = TRUE, miss.labels = axes, interactive = TRUE, ...)
VIM::spineMiss(airquality[,c("Month","Solar.R")]) # spineplot /spinogram
##
## Click in in the left margin to switch to the previous variable or in the right margin to switch to the next variable.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.
Scatterplot with information about missing/imputed values
In addition to a standard scatterplot, lines are plotted for the missing values in one variable. If there are imputed values, they will be highlighted.
scattMiss(x, delimiter = NULL, side = 1, col = c("skyblue", "red",
"orange", "lightgrey"), alpha = NULL, lty = c("dashed", "dotted"),
lwd = par("lwd"), quantiles = c(0.5, 0.975), inEllipse = FALSE,
zeros = FALSE, xlim = NULL, ylim = NULL, main = NULL, sub = NULL,
xlab = NULL, ylab = NULL, interactive = TRUE, ...)
VIM::scattMiss(airquality[,c("Ozone","Solar.R")]) # missing data line ile gosterilir
##
## Click in bottom or left margin to change the 'side' argument accordingly.
## To regain use of the VIM GUI and the R console, click anywhere else in the graphics window.
Mosaic plot with information about missing/imputed values
Create a mosaic plot with information about missing/imputed values.
mosaicMiss(x, delimiter = NULL, highlight = NULL, selection = c("any",
"all"), plotvars = NULL, col = c("skyblue", "red", "orange"),
labels = NULL, miss.labels = TRUE, ...)
datayı duzelt
VIM::mosaicMiss(airquality, high = 4, plotvars = 5:6) # mosaic plot
?growdotMiss ?colormapMiss ?spineMiss ?mosaicMiss ?histMiss ?mapMiss
Amelia::missmap(airquality)
naniar::vis_miss(airquality)
naniar::gg_miss_upset(airquality)
#naniar::gg_miss_upset(riskfactors)
naniar::gg_miss_var(airquality)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
naniar::gg_miss_case(airquality)
#naniar::gg_miss_fct(airquality)